getExpression <- function(str) {
first <- substr(str, 1, 7)
second <- substr(str, 8, nchar(str))
second <- strsplit(second, "")[[1]]
seperators <- c(0, which(second %in% c("-", "+")))
expressed <- list()
for(i in 2:length(seperators)) {
if(second[seperators[i]] == "+") {
expressed[[i]] <- paste(second[(seperators[(i - 1)] + 1) : seperators[i]], collapse = '')
}
}
expressed <- paste(unlist(expressed), collapse = '')
expressed <- paste(first, expressed, sep = '')
return(expressed)
}
assign <- function(x) {
x$prop <- x$count / x$parentcount
assign <- as.numeric(by(x, x$subset, function(y) max(y$prop[y$stim != 0]) > min(y$prop[y$stim == 0])))
assign[assign == 1] <- -1
result <- data.frame(ptid = x$ptid[1], subset = unique(x$subset), assign = assign)
return(result)
}
# Loading Data --------------------------------
# hvtn <- read.csv(file = "data/merged_505_stats.csv")
# names(hvtn) <- tolower(names(hvtn))
# hvtn <- subset(hvtn, !is.na(ptid))
# saveRDS(hvtn, file = "data/505_stats.rds")
# Getting Demographic data ------------------------
demo <- read.csv(file = "data/primary505.csv")
infect <- data.frame(ptid = demo$ptid, status = demo$HIVwk28preunbl)
infect <- subset(infect, infect$ptid %in% hvtn$ptid)
# Getting marginals -----------------------------
library(flowReMix)
hvtn <- readRDS(file = "data/505_stats.rds")
length(unique(hvtn$name))
length(unique(hvtn$ptid))
length(unique(hvtn$population))
unique(hvtn$population)
unique(hvtn$stim)
nchars <- nchar(as.character(unique(hvtn$population)))
#marginals <- unique(hvtn$population)[nchars < 26]
marginals <- unique(hvtn$population)[nchars == 26]
marginals <- subset(hvtn, population %in% marginals)
marginals <- subset(marginals, stim %in% c("negctrl", "VRC ENV A",
"VRC ENV B", "VRC ENV C",
"VRC GAG B", "VRC NEF B",
"VRC POL 1 B", "VRC POL 2 B"))
marginals <- subset(marginals, !(population %in% c("4+", "8+")))
marginals <- subset(marginals, !(population %in% c("8+/107a-154-IFNg-IL2-TNFa-", "4+/107a-154-IFNg-IL2-TNFa-")))
marginals$stim <- factor(as.character(marginals$stim))
marginals$population <- factor(as.character(marginals$population))
# Descriptives -------------------------------------
library(ggplot2)
marginals$prop <- marginals$count / marginals$parentcount
# ggplot(marginals) + geom_boxplot(aes(x = population, y = log(prop), col = stim))
require(dplyr)
negctrl <- subset(marginals, stim == "negctrl")
negctrl <- summarize(group_by(negctrl, ptid, population), negprop = mean(prop))
negctrl <- as.data.frame(negctrl)
marginals <- merge(marginals, negctrl, all.x = TRUE)
# ggplot(subset(marginals, stim != "negctrl" & parent == "4+")) +
# geom_point(aes(x = log(negprop), y = log(prop)), size = 0.25) +
# facet_grid(stim ~ population, scales = "free") +
# theme_bw() +
# geom_abline(intercept = 0, slope = 1)
# Setting up data for analysis ---------------------------
unique(marginals$stim)
gag <- subset(marginals, stim %in% c("VRC GAG B", "negctrl"))
gag$subset <- factor(paste("gag", gag$population, sep = "/"))
gag$stimGroup <- "gag"
pol <-subset(marginals, stim %in% c("negctrl", "VRC POL 1 B", "VRC POL 2 B"))
pol$subset <- factor(paste("pol", pol$population, sep = "/"))
pol$stimGroup <- "pol"
env <- subset(marginals, stim %in% c("negctrl", "VRC ENV C", "VRC ENV B", "VRC ENV A"))
env$subset <- factor(paste("env", env$population, sep = "/"))
env$stimGroup <- "env"
nef <- subset(marginals, stim %in% c("negctrl", "VRC NEF B"))
nef$subset <- factor(paste("nef", nef$population, sep = "/"))
nef$stimGroup <- "nef"
subsetDat <- rbind(gag, pol, env, nef)
subsetDat$stim <- as.character(subsetDat$stim)
subsetDat$stim[subsetDat$stim == "negctrl"] <- 0
subsetDat$stim <- factor(subsetDat$stim)
# Converting subset names ------------------
subsets <- as.character(unique(subsetDat$subset))
expressed <- sapply(subsets, getExpression)
map <- cbind(subsets, expressed)
subsetDat$subset <- as.character(subsetDat$subset)
for(i in 1:nrow(map)) {
subsetDat$subset[which(subsetDat$subset == map[i, 1])] <- map[i, 2]
}
subsetDat$subset <- factor(subsetDat$subset)
# Getting outcomes -------------------------------
treatmentdat <- read.csv(file = "data/rx_v2.csv")
names(treatmentdat) <- tolower(names(treatmentdat))
treatmentdat$ptid <- factor(gsub("-", "", (treatmentdat$ptid)))
treatmentdat <- subset(treatmentdat, ptid %in% unique(subsetDat$ptid))
# Finding problematic subsets?
keep <- by(subsetDat, list(subsetDat$subset), function(x) mean(x$count > 1) > 0.02)
keep <- names(keep[sapply(keep, function(x) x)])
#result$subsets[result$qvals < 0.1] %in% keep
subsetDat <- subset(subsetDat, subset %in% keep)
subsetDat$subset <- factor(as.character(subsetDat$subset))
subsetDat <- merge(subsetDat, treatmentdat)
subsetDat <- merge(subsetDat, infect)
subsetDat$hiv <- subsetDat$status
subsetDat$hiv[subsetDat$control == 1] <- NA
subsetDat$vaccine <- 1 - subsetDat$control
# Fitting the model ------------------------------
library(flowReMix)
control <- flowReMix_control(updateLag = 5, nsamp = 60, initMHcoef = 2.5,
keepEach = 6, isingWprior = FALSE,
markovChainEM = FALSE, zeroPosteriorProbs = FALSE,
nPosteriors = 1, centerCovariance = FALSE,
maxDispersion = 10^3, minDispersion = 10^7,
randomAssignProb = 10^-8, intSampSize = 50,
isingInit = -log(99),
initMethod = "robust")
subsetDat$batch <- factor(subsetDat$batch..)
subsetDat$stimGroup <- factor(subsetDat$stimGroup)
subsetDat$batch <- factor(subsetDat$batch..)
subsetDat$stimGroup <- factor(subsetDat$stimGroup)
subsetDat <- data.frame(subsetDat %>% group_by(ptid,population,stim,stimGroup,parent) %>% filter(collection.num==max(collection.num)))
# saveRDS(subsetDat, file = "data/processedHVTN.rds")
# preAssign <- by(subsetDat, subsetDat$ptid, assign)
# preAssign <- do.call("rbind", preAssign)
fit <- flowReMix(cbind(count, parentcount - count) ~ stim,
subject_id = ptid,
cell_type = subset,
cluster_variable = stim,
data = subsetDat,
covariance = "sparse",
ising_model = "sparse",
regression_method = "robust",
iterations = 30,
parallel = FALSE,
verbose = TRUE, control = control,
newSampler = TRUE)
# Loading files -------------------
filenames <- as.list(c(dir(path = 'data analysis/results', pattern="hvtn_33__*")))
select1 <- sapply(filenames, function(x) length(grep("MC", x) > 0)) == 1
select2 <- sapply(filenames, function(x) length(grep("niter40", x) > 0)) == 1
select4 <- sapply(filenames, function(x) length(grep("prior2", x) > 0)) == 1
filenames <- filenames[select2 & select1 & select4]
filenames <- lapply(filenames, function(x) paste0('data analysis/results/', x))[-c(3, 4)]
post <- list()
for(i in 1:length(filenames)) {
fit <- readRDS(file = filenames[[i]])
post[[i]] <- fit$posteriors[, -1]
}
post <- Reduce("+", post) / length(filenames)
fit$data <- subsetDat
fit$posteriors[, -1] <- post
print(filenames)
hist(colMeans(fit$posteriors[, -1]))
# ROC plots -----------------------------
require(pROC)
rocResults <- summary(fit, type = "ROC",
target = vaccine, direction = ">", adjust = "BH",
sortAUC = FALSE)
rocResults[order(rocResults$auc, decreasing = TRUE), ]
vaccine <- outcome[, 2] == 0
hiv <- infect[, 2]
hiv[vaccine == 0] <- NA
infectROC <- summary(fit, type = "ROC",
target = hiv, direction = ">", adjust = "BH",
sortAUC = FALSE)
select <- infectROC$responseProb > 0.01
infectROC$qvalue[!select] <- NA
infectROC$qvalue[select] <- p.adjust(infectROC$pvalue[select], method = "BH")
infectROC[order(infectROC$auc, decreasing = TRUE), ]
sum(infectROC$qvalue < 0.05, na.rm = TRUE)
library(cowplot)
# save_plot(ising, filename = "figures/hvtnIsingStb1.pdf",
# base_width = 7, base_height = 6)
# load(file = "data analysis/results/hvtnAggreageRandom1.Robj")
# rand <- stability
# random <- plot(rand, threshold = 0.7, fill = rocResults$auc)
# random
# Posterior boxplots ------------------------
nfunctions <- sapply(strsplit(colnames(fit$posteriors)[-1], "+", fixed = TRUE), function(x) length(x) - 1)
weightList <- list()
weightList$Polyfunctionality <- weights <- nfunctions / choose(5, nfunctions)
# weightList$functionality <- rep(1, length(nfunctions))
# All
allbox <- plot(fit, type = "boxplot", target = hiv,
groups = "all", weights = weightList,
one_sided = TRUE,
test = "wilcoxon", jitter = TRUE)
allbox
# save_plot("figures/HVTNallboxplot.pdf", allbox)
stimgroups <- lapply(split(subsetDat$subset, subsetDat$stimGroup), unique)
stimbox <- plot(fit, type = "boxplot", target = hiv,
groups = stimgroups, weights = weightList,
test = "wilcoxon", jitter = FALSE, adjust = "BH",
ncol = 2, one_sided = TRUE)
stimbox
# save_plot("figures/hvtnStimBox2.pdf", stimbox,
# base_width = 9, base_height = 6)
parents <- lapply(split(subsetDat$subset, subsetDat$parent), unique)[-1]
parentbox <- plot(fit, type = "boxplot", target = hiv,
groups = parents, weights = weightList,
test = "wilcoxon", jitter = FALSE,
ncol = 2)
parentbox
# save_plot("figures/hvtnParentBox2.pdf", parentbox,
# base_width = 9, base_height = 5)
stimparent <- lapply(split(subsetDat$subset, paste(subsetDat$stimGroup, subsetDat$parent, sep = "/")), unique)
stimparentbox <- plot(fit, type = "boxplot", target = hiv,
groups = stimparent, weights = weightList,
test = "wilcoxon", jitter = FALSE,
ncol = 3)
stimparentbox
# save_plot("figures/hvtnParentStimBox2.pdf", parentstimbox,
# base_width = 10, base_height = 8)
# Stability Graphs ---------------
# system.time(stab <- stabilityGraph(fit, type = "ising", gamma = 0.25, cpus = 1, reps = 100))
# saveRDS(stab, file = "data analysis/results/hvtn_stab_5_niter30npost1seed3sa06.rds")
# fit$assignmentList <- NULL
# fit$randomEffectSamp <- NULL
# saveRDS(fit, file = "data analysis/results/hvtn_5_niter30npost1seed3sa06.rds")
stab <- fit$stabilityGraph
edges <- 10
<<<<<<< HEAD
ising <- plot(stab, threshold = 0.9, fill = rocResults$auc)
=======
ising <- plot(stab, nEdges = 75, fill = rocResults$auc)
>>>>>>> memoryFix-
ising
ising <- plot(stab, threshold = 0.9, fill = infectROC$auc, layout = "kamadakawai")
ising
# Scatter plots -----------------------
scatter <- plot(fit, target = vaccine, type = "scatter", ncol = 11)
# save_plot("figures/HVTNscatter.pdf", scatter,
# base_height = 20,
# base_width = 22, limitsize = FALSE)
# FDR curves ----------------
fdrplot <- plot(fit, target = vaccine, type = "FDR")
# save_plot("figures/hvtnFDRplot.pdf", fdrplot,
# base_height = 20,
# base_width = 22, limitsize = FALSE)
# Boxplots for graph clusters -----------
groups <- getGraphComponents(stability, threshold = 0.65, minsize = 4)
weightList$Polyfunctionality <- weights <- nfunctions / choose(5, nfunctions)
names(groups) <- c("107+", "Large CD4+", "env/8+")
# pvals <- numeric(length(groups))
# weights <- list()
# weights[[1]] <- rep(1, ncol(fit$posteriors) - 1)
# names(weights) <- "Aggregate"
# post <- fit$posteriors[, -1]
# for(i in 1:length(groups)) {
# group <- groups[[i]]
# w <- weightList[[1]]
# sub <- colnames(post) %in% group
# score <- 1 - apply(post[, sub], 1, function(x) weighted.mean(x, w[sub]))
# glmfit <- glm(hiv ~ score, family = "binomial")
# pvals[i] <- summary(glmfit)$coefficients[2, 4] / 2
# print(roc(hiv ~ score)$auc)
# }
# names(groups) <- paste(names(groups), "pvalue:", round(pvals, 4))
clusterbox <- plot(fit, type = "boxplot", target = infection, groups = groups, ncol = 3,
weights = weightList, test = "t-test",
one_sided = TRUE)
# save_plot("figures/HVTNisingBoxplotBio.pdf", clusterbox,
# base_width = 8, base_height = 3)
# Posterior probabilities for nresponses ----------------
infect$hiv <- "non-infected"
infect$hiv[infect$status == 1] <- "infected"
infect$hiv[outcome[, 2] == 1] <- "placebo"
assignments <- fit$assignmentList
names(assignments) <- substr(names(assignments), 1, 9)
assignments <- lapply(unique(names(assignments)), function(x) {
do.call("rbind", assignments[names(assignments) == x])
})
post <- data.frame(t(sapply(assignments, colMeans)))
post <- cbind(1:nrow(post), post)
subsets <- names(fit$coefficients)
nfunctions <- sapply(subsets, function(x) length(strsplit(x, "+", fixed = TRUE)[[1]]) - 1)
weightList <- list()
weightList$poly <- weights <- nfunctions / choose(5, nfunctions)
weightList$func <- rep(1, length(nfunctions))
resultList <- list()
type <- "poly"
for(w in 1:length(weightList)) {
weights <- weightList[[w]]
weightname <- names(weightList)[w]
resultList[[w]] <- list()
print(weightname)
for(i in 1:(length(assignments) + 3)) {
cat(i, " ")
if(i <= length(assignments)) {
samp <- assignments[[i]]
ptid <- fit$posteriors$ptid[i]
} else if(i == (length(assignments) + 1)) {
ptid <- "non-infected"
samp <- do.call("rbind", assignments[infect[, 3] == "non-infected"])
} else if(i == (length(assignments) + 2)){
ptid <- "placebo"
samp <- do.call("rbind", assignments[infect[, 3] == "placebo"])
} else {
ptid <- "infected"
samp <- do.call("rbind", assignments[infect[, 3] == "infected"])
}
#colnames(samp) <- substring(subsets, 1, 3) # for stim groups
#colnames(samp) <- substring(subsets, 1, 6) # for stim + 8 or 4 groups
#colnames(samp) <- substring(subsets, 5, 6) # + 8 or 4 groups
colnames(samp) <- substring(subsets, 5) # for cell-type groups
#colnames(samp) <- rep("all", ncol(samp)) # for just one plot
groups <- unique(colnames(samp))
subjDatList <- list()
if(type != "breadth") {
for(j in 1:length(groups)) {
group <- groups[j]
subsamp <- samp[, colnames(samp) == group]
subw <- weights[colnames(samp) == group]
subw <- subw / sum(subw)
groupSize <- ncol(subsamp)
if(length(subw) == 1) {
values <- subsamp * subw
} else {
values <- subsamp %*% subw
}
uniqueValues <- c(0, sort(unique(values)), 1)
props <- sapply(uniqueValues, function(x) mean(values >= x))
subjDatList[[j]] <- data.frame(ptid = ptid, group = group,
index = weightname,
presponses = uniqueValues,
postProb = props)
}
} else {
templist <- list()
for(j in 1:length(groups)) {
group <- groups[j]
subsamp <- samp[, colnames(samp) == group]
groupSize <- ncol(subsamp)
weights <- seq(from = 5, to = 1,length.out = groupSize)#(groupSize:1)^0.75
weights <- weights / sum(weights)
weights <- cumsum(weights)
map <- cbind(0:groupSize, c(0, weights))
values <- map[apply(subsamp, 1, sum), 2]
templist[[j]] <- values
}
templist <- do.call("cbind", templist)
parents <- substr(groups, 5, 6)
parentGroups <- c("4+", "8+")
for(j in 1:length(parentGroups)) {
parent <- parentGroups[j]
values <- rowMeans(templist[, parents == parent])
uniqueVals <- sort(unique(c(values, 0, 1)), decreasing = TRUE)
props <- sapply(uniqueVals, function(x) mean(values >= x))
subjDatList[[j]] <- data.frame(ptid = ptid, group = parent,
index = "breadth",
presponses = uniqueVals,
postProb = props)
}
}
resultList[[w]][[i]] <- do.call("rbind", subjDatList)
}
cat("\n")
}
responsedat <- do.call("rbind", do.call("rbind", resultList))
forplot <- merge(responsedat, infect, by.x = "ptid", by.y = "ptid",
all.x = TRUE)
summarized <- subset(forplot, ptid %in% c("infected", "non-infected", "placebo"))
forplot <- subset(forplot, !(ptid %in% c("infected", "non-infected", "placebo")))
forplot <- forplot[order(forplot$ptid, forplot$group, forplot$presponses), ]
forplot$parent <- substr(forplot$group, 5, 6)
forplot$protein <- substr(forplot$group, 1, 3)
summarized$parent <- substr(summarized$group, 5, 6)
summarized$protein <- substr(summarized$group, 1, 3)
temp <- subset(forplot, index == "poly")
sumtemp <- subset(summarized, index == "poly")
ggplot(temp) + geom_step(aes(x = presponses, y = 1 - postProb, group = ptid,
col = factor(hiv)),
alpha = 0.42) +
geom_step(data = sumtemp, aes(x = presponses, y = 1 - postProb,
linetype = ptid), size = 0.7) +
facet_wrap( ~ group) + theme_bw() +
xlab("At Least % Responsive Subsets") +
ylab("Posterior Probabilities") #+
integrateCDF <- function(x) {
x <- x[, 3:4]
result <- 0
for(i in 2:nrow(x)) {
base <- (x[i, 1] - x[i - 1, 1])
result <- result + base * (x[i, 2])
#result <- result + base * (x[i - 1, 2] - x[i, 2]) / 2
}
return(result)
}
intCDF <- summarize(group_by(forplot, ptid, group, index, hiv),
measure = integrateCDF(cbind(ptid, group, presponses, postProb)))
intCDF <- do.call("rbind", by(intCDF, list(intCDF$group, intCDF$index),
function(x) {
measure <- x$measure
x$measure <- (measure - mean(measure)) / sd(measure)
return(x)
}))
par(mfrow = c(1, 2), mar = rep(3, 3))
temp <- subset(intCDF, hiv != "placebo")
aucs <- by(temp, list(temp$group, temp$index), function(x) {
print(unique(x$group))
rocfit <- roc(x$hiv == "infected" ~ x$measure)
auc <- round(rocfit$auc, 3)
main <- paste(unique(x$group), unique(x$index), "AUC:", auc)
plot(rocfit, main = main)
return(auc)
})
n0 <- sum(infect$hiv == "infected")
n1 <- sum(infect$hiv == "non-infected")
pvals <- pwilcox(aucs * n0 * n1, n0, n1, lower.tail = FALSE)
ggplot(subset(intCDF, index != "!!"),
aes(x = hiv, y = measure, col = hiv)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(size = 0.1) +
facet_grid(index ~ group, scales= "free_y") +
theme_bw() + ylim(-2.5, 2.5)
# Breadth X Functionality Tensor -----------------
library(viridis)
library(ggthemes)
library(dplyr)
library(reshape2)
library(ggplot2)
infect$hiv <- "non-infected"
infect$hiv[infect$status == 1] <- "infected"
infect$hiv[outcome[, 2] == 1] <- "placebo"
post <- fit$posteriors
post <- melt(post, id = "ptid")
names(post)[2:3] <- c("combination", "posterior")
post$subset <- substring(post$combination, 5)
post$stim <- substring(post$combination, 1, 3)
post <- merge(post, infect, all.x = TRUE)
post$hiv <- factor(post$hiv, levels = c("non-infected", "infected", "placebo"))
summ <- summarize(group_by(post, combination, subset, stim, hiv),
mposterior = 1 - mean(posterior))
sig <- result$qvals < 0.05
sig <- data.frame(combination = result$subsets, sig = sig)
summ <- merge(summ, sig, all.x = TRUE)
summ$sig[!summ$sig] <- NA
summ$vaccine <- "vaccine"
summ$vaccine[summ$hiv == "placebo"] <- "placebo"
summ$vaccine <- factor(summ$vaccine, level = c("vaccine", "placebo"))
onlysig <- subset(summ, sig)
ggplot(summ, aes(x = stim, y = subset, fill = mposterior)) +
geom_tile(color = "white", size = 0.1) +
geom_tile(data = onlysig, color = "red", size = 0.5) +
scale_fill_viridis(name="posterior") +
facet_wrap(~ vaccine) +
theme_tufte(base_family="Helvetica") +
theme(axis.text.y = element_text(size = 5.5))
sig <- infectresult$infectQvals < 0.1
sig <- data.frame(combination = infectresult$subsets, sig = sig)
summ <- summ[, -which(colnames(summ) == "sig")]
summ <- merge(summ, sig, all.x = TRUE)
summ$sig[!summ$sig] <- NA
temp <- subset(summ, hiv != "placebo")
temp$hiv <- factor(temp$hiv, levels = c("non-infected", "infected"))
onlysig <- subset(temp, sig)
ggplot(temp, aes(x = stim, y = subset, fill = mposterior)) +
geom_tile(color = "white", size = .1) +
geom_tile(data = onlysig, aes(x = stim, y = subset),
col = "red", size = 0.5) +
scale_fill_viridis(name="posterior") +
facet_wrap(~ hiv) +
theme_tufte(base_family="Helvetica") +
theme(axis.text.y = element_text(size = 5.5))
# Scatter plots for problematic subsets ------------
require(reshape2)
outcome <- treatmentdat[, c(13, 15)]
posteriors <- fit$posteriors
posteriors <- melt(posteriors, id.vars = c("ptid"))
names(posteriors)[2:3] <- c("subset", "posterior")
posteriors <- merge(posteriors, outcome,
by.x = "ptid", by.y = "ptid",
all.x = TRUE)
forplot <- merge(subsetDat, posteriors, all.x = TRUE,
by.x = c("ptid", "subset"),
by.y = c("ptid", "subset"))
pop <- "8+/107a-154+IFNg+IL2-TNFa+"
pop <- "8+/107a-154+IFNg+IL2+TNFa+"
pop <- "4+/107a+154+IFNg-IL2+TNFa-"
pop <- "4+/107a+154-IFNg-IL2-TNFa+"
pop <- "8+/107a-154+IFNg+IL2-TNFa-"
pop <- "8+/107a-154+IFNg+IL2+TNFa-"
pop <- "8+/107a-154+IFNg+IL2-TNFa-"
pop <- "8+/107a-154+IFNg-IL2-TNFa-"
pop <- "8+/107a+154-IFNg-IL2+TNFa+"
pop <- "8+/107a+154-IFNg-IL2+TNFa+"
pop <- "8+/107a+154-IFNg-IL2+TNFa-"
pop <- "8+/107a+154-IFNg+IL2+TNFa-"
temp <- subset(forplot, population == pop)
ggplot(temp) +
geom_point(aes(x = log(negprop + 1 / parentcount), y = log(prop + 1 / parentcount),
col = 1 - posterior), size = 0.25) +
facet_wrap( ~ stimGroup, scales = "free", nrow = 2) +
theme_bw() +
geom_abline(intercept = 0, slope = 1) +
scale_colour_gradientn(colours = rainbow(4)) +
ggtitle(pop)
sort(temp$prop, decreasing = TRUE)
# Weighting posterior by variances -----
post <- fit$posteriors
post[, 2:ncol(post)] <- 1 - post[2:ncol(post)]
post <- melt(post, id = "ptid")
names(post) <- c("ptid", "subset", "posterior")
post$parent <- substr(post$subset, 5, 6)
post$stim <- substr(post$subset, 1, 3)
post$group <- substr(post$subset, 1, 6)
post$group <- 1
weights <- apply(fit$posteriors[, -1], 2, var)
nfunctions <- sapply(subsets, function(x) length(strsplit(x, "+", fixed = TRUE)[[1]]) - 1)
weights <- weights * nfunctions / choose(5, nfunctions)
weights <- weights / sum(weights)
weights <- data.frame(subset = colnames(fit$posteriors)[-1],
weights = weights)
post <- merge(post, weights, all.x = TRUE)
score <- summarize(group_by(post, ptid, group),
score = weighted.mean(posterior, weights))
score <- merge(score, infect, all.x = TRUE)
ggplot(score) + geom_boxplot(aes(x = hiv, y = score, col = hiv)) +
facet_wrap(~ group) + theme_bw()
par(mfrow = c(1, 1))
by(score, score$group, function(x) {
rocfit <- roc(x$hiv != "placebo" ~ x$score)
plot(rocfit, main = paste(unique(x$group), round(rocfit$auc, 3)))
})
par(mfrow = c(1, 1))
temp <- subset(score, hiv != "placebo")
by(temp, temp$group, function(x) {
rocfit <- roc(x$hiv ~ x$score)
plot(rocfit, main = paste(unique(x$group), round(rocfit$auc, 3)))
})
# Grouping by stim ---------------------------
assignments <- fit$assignmentList
names(assignments) <- substr(names(assignments), 1, 9)
assignments <- lapply(unique(names(assignments)), function(x) {
do.call("rbind", assignments[names(assignments) == x])
})
sapply(assignments, function(samp) mean(apply(samp, 1, function(x) sum(x == 0) < 40)))
subsets <- names(fit$coefficients)
summarizeBySubset <- function(samp, subsets, threshold = 0) {
colnames(samp) <- substring(subsets, 5) # for cell-type groups
#colnames(samp) <- substring(subsets, 1, 6) # for stim groups
result <- sapply(unique(colnames(samp)), function(x) {
index <- colnames(samp) == x
t(apply(samp, 1, function(y) sum(y[index]) > threshold))
})
}
collapsed <- lapply(assignments, summarizeBySubset, subsets, threshold = 0)
# AUC
posteriors <- matrix(nrow = length(collapsed), ncol = ncol(collapsed[[1]]))
for(i in 1:length(collapsed)) posteriors[i, ] <- colMeans(collapsed[[i]])
posteriors <- data.frame(posteriors)
posteriors$ptid <- fit$posteriors[, 1]
colnames(posteriors) <- c(colnames(collapsed[[1]]), "ptid")
outcome <- treatmentdat[, c(13, 15)]
posteriors <- merge(posteriors, outcome,
by.x = "ptid", by.y = "ptid",
all.x = TRUE)
ctrlCol <- ncol(posteriors)
par(mfrow = c(4, 5), mar = rep(3, 4))
aucs <- numeric(ctrlCol - 2)
n1 <- sum(posteriors$control == 1)
n0 <- sum(posteriors$control == 0)
for(i in 2:(ctrlCol - 1)) {
post <- 1 - posteriors[, i]
outcome <- posteriors[, ctrlCol]
rocfit <- roc(outcome ~ post)
subset <- names(posteriors)[i]
auc <- rocfit$auc
try(plot(rocfit, main = paste(subset, round(auc, 3))))
aucs[i - 1] <- auc
}
pvals <- pwilcox(aucs * n0 * n1, n0, n1, lower.tail = FALSE)
pvals <- 2 * pmin(pvals, 1 - pvals)
qvals <- p.adjust(pvals, method = "BY")
subsets <- names(posteriors)[-c(1, ctrlCol)]
result <- data.frame(subsets, aucs, pvals, qvals)
result[order(result$aucs, decreasing = TRUE), ]
# Graph
cells <- colnames(collapsed[[1]])
assignments <- collapsed
reps <- 40
modelList <- list()
nsubsets <- ncol(assignments[[1]])
countCovar <- matrix(0, nrow = nsubsets, ncol = nsubsets)
for(i in 1:reps) {
mat <- t(sapply(assignments, function(x) x[sample(1:nrow(x), 1), ]))
colnames(mat) <- names(cells)
keep <- apply(mat, 2, function(x) any(x != x[1]))
mat <- mat[, keep]
model <- IsingFit::IsingFit(mat, AND = TRUE, plot = TRUE)
modelList[[i]] <- model
#plot(model)
countCovar[keep, keep] <- countCovar[keep, keep] + (model$weiadj != 0) * sign(model$weiadj)
print(i)
}
props <- countCovar / reps
table(props)
threshold <- 0.1
which(props > threshold, arr.ind = TRUE)
props[abs(props) <= threshold] <- 0
sum(props != 0) / 2
# Plotting graph
require(GGally)
library(network)
library(sna)
network <- props
keep <- apply(network, 1, function(x) any(abs(x) >= threshold)) | TRUE
network <- network[keep, keep]
net <- network(props)
subsets <- names(fit$coefficients)
nodes <- ggnet2(network, label = cells)$data
edges <- matrix(nrow = sum(network != 0)/2, ncol = 5)
p <- nrow(network)
row <- 1
for(j in 2:p) {
for(i in 1:(j-1)) {
if(network[i, j] != 0) {
edges[row, ] <- unlist(c(nodes[i, 6:7], nodes[j, 6:7], network[i, j]))
row <- row + 1
}
}
}
edges <- data.frame(edges)
names(edges) <- c("xstart", "ystart", "xend", "yend", "width")
nodes$auc <- aucs[keep]
nodes$qvals <- result$qvals[keep]
nodes$sig <- nodes$qvals < 0.1
names(edges)[5] <- "Dependence"
lims <- max(abs(props))
library(ggplot2)
ggplot() +
scale_colour_gradient2(limits=c(-lims, lims), low="dark red", high = "dark green") +
geom_segment(data = edges, aes(x = xstart, y = ystart,
xend = xend, yend = yend,
col = Dependence,
alpha = abs(Dependence)),
size = 1) +
#scale_fill_gradient2(low = "white", high = "red", limits = c(0.7, 1)) +
scale_fill_gradientn(colours = rainbow(4))+
geom_point(data = nodes, aes(x = x, y = y, fill = auc), shape = 21,
size = 8, col = "grey") +
scale_shape(solid = FALSE) +
geom_text(data = nodes, aes(x = x, y = y, label = nodes$label), size = 1.8) +
theme(axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),plot.background=element_blank())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.